This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

# general visualisation
library('ggplot2') # visualisation
library('scales') # visualisation
library('patchwork') # visualisation
library('RColorBrewer') # visualisation
library('corrplot') # visualisation
## corrplot 0.84 loaded
# general data manipulation
library('dplyr') # data manipulation
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library('readr') # input/output
## 
## Attaching package: 'readr'
## The following object is masked from 'package:scales':
## 
##     col_factor
library('vroom') # input/output
library('skimr') # overview
library('tibble') # data wrangling
library('tidyr') # data wrangling
library('purrr') # data wrangling
## 
## Attaching package: 'purrr'
## The following object is masked from 'package:scales':
## 
##     discard
library('stringr') # string manipulation
library('forcats') # factor manipulation

# specific visualisation
library('alluvial') # visualisation
library('ggrepel') # visualisation
library('ggforce') # visualisation
library('ggridges') # visualisation
library('gganimate') # animations
## No renderer backend detected. gganimate will default to writing frames to separate files
## Consider installing:
## - the `gifski` package for gif output
## - the `av` package for video output
## and restarting the R session
library('GGally') # visualisation
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
## 
##     nasa
library('ggthemes') # visualisation
library('wesanderson') # visualisation
library('kableExtra') # display
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
# Date + forecast
library('lubridate') # date and time
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:dplyr':
## 
##     intersect, setdiff, union
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library('forecast') # time series analysis
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
#library('prophet') # time series analysis
library('timetk') # time series analysis

# Interactivity
library('crosstalk')
library('plotly')
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# parallel
library('foreach')
## 
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
library('doParallel')
## Loading required package: iterators
## Loading required package: parallel
library(vroom)
library(stringr)
library(tidyverse)
train <- vroom(str_c('/Coursework/Timeseries/Timeseries_project/CSV files/sales_train_validation.csv'), delim = ",", col_types = cols())
prices <- vroom(str_c('/Coursework/Timeseries/Timeseries_project/CSV files/sell_prices.csv'), delim = ",", col_types = cols())
calendar <- read_csv(str_c('/Coursework/Timeseries/Timeseries_project/CSV files/calendar.csv'), col_types = cols())

sample_submit <- vroom(str_c('/Coursework/Timeseries/Timeseries_project/CSV files/sample_submission.csv'), delim = ",", col_types = cols())
extract_ts <- function(df){
  
  min_date <- as.Date("2011-01-29")
  
  df %>%
    select(id, starts_with("d_")) %>%  
    pivot_longer(starts_with("d_"), names_to = "dates", values_to = "sales") %>%
    mutate(dates = as.integer(str_remove(dates, "d_"))) %>% 
    mutate(dates = min_date + dates - 1) %>% 
    mutate(id = str_remove(id, "_validation"))
}

set.seed(4321)
foo <- train %>% 
  sample_n(50)

ts_out <- extract_ts(foo)

cols <- ts_out %>% 
  distinct(id) %>% 
  mutate(cols = rep_len(brewer.pal(7, "Set2"), length.out = n_distinct(ts_out$id)))

ts_out <- ts_out %>% 
  left_join(cols, by = "id")

pal <- cols$cols %>%
   setNames(cols$id)
shared_ts <- highlight_key(ts_out)

palette(brewer.pal(9, "Set3"))

gg <- shared_ts %>% 
  ggplot(aes(dates, sales, col = id, group = id)) +
  geom_line() +
  scale_color_manual(values = pal) +
  labs(x = "Date", y = "Sales") +
  theme_tufte() +
  NULL

filter <- bscols(
  filter_select("ids", "Sales over time: Select a time series ID (remove with backspace key, navigate with arrow keys):", shared_ts, ~id, multiple = TRUE),
  ggplotly(gg, dynamicTicks = TRUE),
  widths = c(12, 12)
)
## Warning in bscols(filter_select("ids", "Sales over time: Select a time series
## ID (remove with backspace key, navigate with arrow keys):", : Sum of bscol width
## units is greater than 12
bscols(filter)
foo <- train %>% 
  summarise_at(vars(starts_with("d_")), sum) %>% 
  mutate(id = 1)

bar <- extract_ts(foo)

gg <- bar %>% 
  ggplot(aes(dates, sales)) +
  geom_line(col = "blue") +
  theme_tufte() +
  labs(x = "Date", y = "Sales", title = "All aggregate sales")

ggplotly(gg, dynamicTicks = TRUE)
foo <- train %>% 
  summarise_at(vars(starts_with("d_")), sum) %>% 
  mutate(id = 1)

bar <- extract_ts(foo)

gg <- bar %>% 
  ggplot(aes(dates, sales)) +
  geom_line(col = "blue") +
  theme_tufte() +
  labs(x = "Date", y = "Sales", title = "All aggregate sales")

ggplotly(gg, dynamicTicks = TRUE)
foo <- train %>%
  group_by(state_id) %>% 
  summarise_at(vars(starts_with("d_")), sum) %>% 
  rename(id = state_id)

bar <- extract_ts(foo) %>% 
  mutate(month = month(dates),
         year = year(dates)) %>% 
  group_by(month, year, id) %>% 
  summarise(sales = sum(sales),
            dates = min(dates)) %>% 
  ungroup() %>% 
  filter(str_detect(as.character(dates), "..-..-01")) %>% 
  filter(dates != max(dates))

gg <- bar %>% 
  ggplot(aes(dates, sales, col = id)) +
  geom_line() +
  theme_tufte() +
  labs(x = "Date", y = "Sales", title = "Monthly Sales per State")

ggplotly(gg, dynamicTicks = TRUE)
foo <- train %>%
  group_by(cat_id) %>% 
  summarise_at(vars(starts_with("d_")), sum) %>% 
  rename(id = cat_id)

bar <- train %>%
  group_by(store_id) %>% 
  summarise_at(vars(starts_with("d_")), sum) %>% 
  rename(id = store_id)

p1 <- extract_ts(foo) %>% 
  mutate(month = month(dates),
         year = year(dates)) %>% 
  group_by(month, year, id) %>% 
  summarise(sales = sum(sales),
            dates = min(dates)) %>% 
  ungroup() %>% 
  filter(str_detect(as.character(dates), "..-..-01")) %>% 
  filter(dates != max(dates)) %>% 
  ggplot(aes(dates, sales, col = id)) +
  geom_line() +
  theme_hc() +
  theme(legend.position = "none") +
  labs(title = "Sales per Category", x = "Date", y = "Sales")

p2 <- train %>% 
  count(cat_id) %>% 
  ggplot(aes(cat_id, n, fill = cat_id)) +
  geom_col() +
  theme_hc() +
  theme(legend.position = "none") +
  theme(axis.text.x = element_text(size = 7)) +
  labs(x = "", y = "", title = "Rows per Category")

p3 <- extract_ts(bar) %>% 
  mutate(month = month(dates),
         year = year(dates)) %>% 
  group_by(month, year, id) %>% 
  summarise(sales = sum(sales),
            dates = min(dates)) %>% 
  ungroup() %>% 
  filter(str_detect(as.character(dates), "..-..-01")) %>% 
  filter(dates != max(dates)) %>% 
  mutate(state_id = str_sub(id, 1, 2)) %>% 
  ggplot(aes(dates, sales, col = id)) +
  geom_line() +
  theme_hc() +
  theme(legend.position = "bottom") +
  labs(title = "Sales per Store", x = "Date", y = "Sales", col = "Store ID") +
  facet_wrap(~state_id)

layout <- "
AAB
CCC
"

p1 + p2 + p3 + plot_layout(design = layout)

min_date <- date("2011-01-29")

foo <- train %>%
  group_by(dept_id, state_id) %>% 
  summarise_at(vars(starts_with("d_")), sum) %>% 
  ungroup() %>% 
  select(ends_with("id"), starts_with("d_")) %>%  
  pivot_longer(starts_with("d_"), names_to = "dates", values_to = "sales") %>%
  mutate(dates = as.integer(str_remove(dates, "d_"))) %>% 
  mutate(dates = min_date + dates - 1)

foo %>% 
  mutate(month = month(dates),
         year = year(dates)) %>% 
  group_by(month, year, dept_id, state_id) %>% 
  summarise(sales = sum(sales),
            dates = min(dates)) %>% 
  ungroup() %>% 
  filter(str_detect(as.character(dates), "..-..-01")) %>% 
  filter(dates != max(dates)) %>% 
  ggplot(aes(dates, sales, col = dept_id)) +
  geom_line() +
  facet_grid(state_id ~ dept_id) +
  theme_tufte() +
  theme(legend.position = "none", strip.text.x = element_text(size = 8)) +
  labs(title = "Sales per Department and State", x = "Date", y = "Sales")

foo <- train %>% 
  summarise_at(vars(starts_with("d_")), sum) %>% 
  mutate(id = 1)

bar <- extract_ts(foo) %>% 
  filter(!str_detect(as.character(dates), "-12-25"))

loess_all <- predict(loess(bar$sales ~ as.integer(bar$dates - min(bar$dates)) + 1, span = 1/2, degree = 1))

bar <- bar %>% 
  mutate(loess = loess_all) %>% 
  mutate(sales_rel = sales - loess)

p1 <- bar %>% 
  ggplot(aes(dates, sales)) +
  geom_line(col = "blue", alpha = 0.5) +
  geom_line(aes(dates, loess), col = "black") +
  theme_hc() +
  labs(x = "", y = "Sales", title = "Total Sales with Smoothing Fit + Seasonality in Residuals")

p2 <- bar %>% 
  mutate(wday = wday(dates, label = TRUE, week_start = 1),
         month = month(dates, label = TRUE),
         year = year(dates)) %>% 
  group_by(wday, month, year) %>% 
  summarise(sales = sum(sales_rel)/1e3) %>%
  ggplot(aes(month, wday, fill = sales)) +
  geom_tile() +
  labs(x = "Month of the year", y = "Day of the week", fill = "Relative Sales [1k]") +
  scale_fill_distiller(palette = "Spectral") +
  theme_hc()

p1 / p2

foo <- train %>%
  group_by(state_id) %>% 
  summarise_at(vars(starts_with("d_")), sum) %>% 
  rename(id = state_id)

bar <- extract_ts(foo) %>% 
  filter(!str_detect(as.character(dates), "-12-25")) %>% 
  group_by(id) %>% 
  mutate(loess = predict(loess(sales ~ as.integer(dates - min(dates)) + 1, span = 1/2, degree = 1)),
         mean_sales = (sales)) %>% 
  mutate(sales_rel = (sales - loess)/mean_sales)

p1 <- bar %>% 
  ggplot(aes(dates, sales, col = id)) +
  geom_line() +
  geom_line(aes(dates, loess), col = "black") +
  facet_wrap(~ id) +
  theme_tufte() +
  theme(legend.position = "none") +
  labs(x = "", y = "Sales", title = "Sales per State with Seasonalities")
Calts<-bar$mean_sales[1:1908]
WIts<-bar$mean_sales[1909:3816]
TXts<-bar$mean_sales[3817:5724]

CA.ts <- ts(Calts,start=1,freq=7)
WI.ts <- ts(WIts,start=1,freq=7)
TX.ts <- ts(TXts,start=1,freq=7)

{r} # library(dplyr) # wal <-read.csv(file='sales_train_validation.csv') # dt<-apply(wal, 1, function(r) any(r %in% c("CA"))) # Caldatachk<-as.matrix(wal[dt,7:1919]) # Calts<- colSums(Caldatachk) # dim(Calts) # Cal.ts <- ts(Calts,start=1,freq=7) #

{r} # dt2<-apply(wal, 1, function(r) any(r %in% c("TX"))) # sv<-as.matrix(wal[dt2,7:1919]) # TXdata<- colSums(sv) # TX.ts <- ts(TXdata,start=1,freq=7) # #

```{r}

dt3<-apply(wal, 1, function(r) any(r %in% c(“WI”)))

WIdata<- colSums(as.matrix(wal[dt3,7:1919]))

WI.ts <- ts(WIdata,start=1,freq=7)

plot(WI.ts,xlab=“Time (in days)”)

par(mfrow=c(1,3))
plot(CA.ts,xlab="Time (in days)")
plot(TX.ts,xlab="Time (in days)")
plot(WI.ts,xlab="Time (in days)")

The three time series for each of the states are represented as above, it clearly shows seasonality, with the christmas day have zero sales. We need to provide such extra bit of information(but how?)

totTs=CA.ts+TX.ts+WI.ts
plot(totTs,xlab="Time (in days)")

The combined time series shows heavy seasonality as well.

require(dlm)
## Loading required package: dlm
## 
## Attaching package: 'dlm'
## The following object is masked from 'package:ggplot2':
## 
##     %+%
log.CA.ts=log(CA.ts)
plot(log.CA.ts)

log.WI.ts=log(WI.ts)
plot(log.WI.ts)

log.TX.ts=log(TX.ts)
plot(log.TX.ts)

# DLM with polynomial second-order trend and seasonality modeled with a seasonal factor representation.
build <- function(parm) {
  dlmModPoly(order = 2, dV = exp(parm[1]), dW = c(exp(parm[2]),exp(parm[3]))) + dlmModTrig(s = 7, dV = 0, dW=exp(parm[4]))
}
fit <- dlmMLE(log.CA.ts, rep(0,4), build)
fit$convergence
## [1] 0
BIC.2nd.seasfactor <-  2 *  fit$value + length(fit$par) * log(length(log.CA.ts))
print("BIC")
## [1] "BIC"
BIC.2nd.seasfactor
## [1] -7187.523
model2 <- build(fit$par)  #This is part where he takes the model parameters
print("Observational noise from MLE")
## [1] "Observational noise from MLE"
model2$V
##             [,1]
## [1,] 0.003291634
print("Innovation variance matrix diagonal elements from MLE")
## [1] "Innovation variance matrix diagonal elements from MLE"
model2$W[1:2,1:2]
##              [,1]         [,2]
## [1,] 0.0007868325 0.000000e+00
## [2,] 0.0000000000 6.407079e-14
log.CA.filt2 <- dlmFilter(log.CA.ts, model2)

cov.filt <- with(log.CA.filt2, dlmSvd2var(U.C, D.C))

seas.term = 2

sd.seasonality.filt2 <- rep(NA,length(cov.filt))
for(i in 1:length(cov.filt)) sd.seasonality.filt2[i] = sqrt(cov.filt[[i]][seas.term,seas.term])
sd.seasonality.filt2 = ts(sd.seasonality.filt2[-(1:8)],frequency=7)
seasonality.filt2 = ts(log.CA.filt2$m[-(1:8),seas.term],frequency=7)
ll = seasonality.filt2 - 1.96 * sd.seasonality.filt2
ul = seasonality.filt2 + 1.96 * sd.seasonality.filt2
llim = min(ll)
ulim = max(ul)
plot(seasonality.filt2,ylim=c(llim,ulim))
lines(ll,lty=2,col="green")
lines(ul,lty=2,col="green")

llim = min(c(min(seasonality.filt2/sd.seasonality.filt2),-1.96))
ulim = max(c(max(seasonality.filt2/sd.seasonality.filt2),1.96))
plot(seasonality.filt2/sd.seasonality.filt2,ylim=c(llim,ulim))
abline(h=1.96,lty=2)
abline(h=-1.96,lty=2)

seas.term = 4

sd.seasonality.filt2 <- rep(NA,length(cov.filt))
for(i in 1:length(cov.filt)) sd.seasonality.filt2[i] = sqrt(cov.filt[[i]][seas.term,seas.term])
sd.seasonality.filt2 = ts(sd.seasonality.filt2[-(1:8)],frequency=7)
seasonality.filt2 = ts(log.CA.filt2$m[-(1:8),seas.term],frequency=7)
ll = seasonality.filt2 - 1.96 * sd.seasonality.filt2
ul = seasonality.filt2 + 1.96 * sd.seasonality.filt2
llim = min(ll)
ulim = max(ul)
plot(seasonality.filt2,ylim=c(llim,ulim))
lines(ll,lty=2,col="green")
lines(ul,lty=2,col="green")

llim = min(c(min(seasonality.filt2/sd.seasonality.filt2),-1.96))
ulim = max(c(max(seasonality.filt2/sd.seasonality.filt2),1.96))
plot(seasonality.filt2/sd.seasonality.filt2,ylim=c(llim,ulim))
abline(h=1.96,lty=2)
abline(h=-1.96,lty=2)

seas.term = 6

sd.seasonality.filt2 <- rep(NA,length(cov.filt))
for(i in 1:length(cov.filt)) sd.seasonality.filt2[i] = sqrt(cov.filt[[i]][seas.term,seas.term])
sd.seasonality.filt2 = ts(sd.seasonality.filt2[-(1:8)],frequency=7)
seasonality.filt2 = ts(log.CA.filt2$m[-(1:8),seas.term],frequency=7)
ll = seasonality.filt2 - 1.96 * sd.seasonality.filt2
ul = seasonality.filt2 + 1.96 * sd.seasonality.filt2
llim = min(ll)
ulim = max(ul)
plot(seasonality.filt2,ylim=c(llim,ulim))
lines(ll,lty=2,col="green")
lines(ul,lty=2,col="green")

llim = min(c(min(seasonality.filt2/sd.seasonality.filt2),-1.96))
ulim = max(c(max(seasonality.filt2/sd.seasonality.filt2),1.96))
plot(seasonality.filt2/sd.seasonality.filt2,ylim=c(llim,ulim))
abline(h=1.96,lty=2)
abline(h=-1.96,lty=2)

seas.term = 8

sd.seasonality.filt2 <- rep(NA,length(cov.filt))
for(i in 1:length(cov.filt)) sd.seasonality.filt2[i] = sqrt(cov.filt[[i]][seas.term,seas.term])
sd.seasonality.filt2 = ts(sd.seasonality.filt2[-(1:8)],frequency=7)
seasonality.filt2 = ts(log.CA.filt2$m[-(1:8),seas.term],frequency=7)
ll = seasonality.filt2 - 1.96 * sd.seasonality.filt2
ul = seasonality.filt2 + 1.96 * sd.seasonality.filt2
llim = min(ll)
ulim = max(ul)
plot(seasonality.filt2,ylim=c(llim,ulim))
lines(ll,lty=2,col="green")
lines(ul,lty=2,col="green")

llim = min(c(min(seasonality.filt2/sd.seasonality.filt2),-1.96))
ulim = max(c(max(seasonality.filt2/sd.seasonality.filt2),1.96))
plot(seasonality.filt2/sd.seasonality.filt2,ylim=c(llim,ulim))
abline(h=1.96,lty=2)
abline(h=-1.96,lty=2)

###################
#   Forecasting   #
###################

predictions <- dlmForecast(log.CA.filt2, n=28)

ll = predictions$f - 1.96 * sqrt(unlist(predictions$Q))
ul = predictions$f + 1.96 * sqrt(unlist(predictions$Q))
plot(log.CA.ts, xlab = "", col = "darkgrey",xlim=c(250,300),lwd=2)
#plot(log.choc, xlab = "", col = "darkgrey",xlim=c(1958,2000), ylim=c(1000,10000),lwd=2)
lines(predictions$f, col="red",lwd=2)
lines(ll,lty=2, col="green",lwd=2)
lines(ul,lty=2, col="green",lwd=2)

######################################################
#          One-step ahead forecast error for the last 9 years                #
######################################################
print(" Mean absolute forecast error")
## [1] " Mean absolute forecast error"
# Mean absolute forecast error (MAE)
mean(abs(log.CA.filt2$f[1800:1908] - log.CA.ts[1800:1908]))
## [1] 0.06420369
print(" Mean squared forecast error (MSE)")
## [1] " Mean squared forecast error (MSE)"
# Mean squared forecast error (MSE)
mean((log.CA.filt2$f[1800:1908] - log.CA.ts[1800:1908])^2)
## [1] 0.007054698
print(" Mean absolute percentage forecast error (MAPE)")
## [1] " Mean absolute percentage forecast error (MAPE)"
# Mean absolute percentage forecast error (MAPE)
mean(abs(log.CA.filt2$f[1800:1908] - log.CA.ts[1800:1908]) / log.CA.ts[1800:1908])
## [1] 0.006566335
plot(log.CA.filt2$f,ylim=c(9,11))
lines(log.CA.ts,col="green")

# Following snippet of ccode is for model diagonstics
# Get one-step ahead forecast errors
res <- residuals(log.CA.filt2, sd=FALSE)

# Plot one-step ahead forecast errors

plot(res,type='h'); abline(h=0)

par(mfrow=c(1,2))
acf(res,lag.max = 260)
pacf(res,lag.max = 260)

# Plot qq-plot of one-step ahead forecast errors
qqnorm(res); qqline(res)


# Test normality with the Shapiro-Wilk normality test
# H_0: errors are normally distribution
# H_A: errors are not normally distribution
shapiro.test(res)
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.9494, p-value < 2.2e-16
# Test autocorrelation with the Ljung-Box test
# H_0: errors are independent
# H_A: errors exhibit serial correlation
 Box.test(res, lag=20, type="Ljung")   
## 
##  Box-Ljung test
## 
## data:  res
## X-squared = 207.97, df = 20, p-value < 2.2e-16
sapply(1 : 20, function(i)
       Box.test(res, lag = i, type = "Ljung-Box")$p.value)
##  [1] 4.912737e-13 9.514611e-14 0.000000e+00 0.000000e+00 0.000000e+00
##  [6] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [11] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [16] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00

# DLM with polynomial second-order trend and seasonality modeled with a seasonal factor representation.
build <- function(parm) {
  dlmModPoly(order = 2, dV = exp(parm[1]), dW = c(exp(parm[2]),exp(parm[3]))) + dlmModTrig(s = 7, dV = 0, dW=exp(parm[4]))
}
fit <- dlmMLE(log.WI.ts, rep(0,4), build)
fit$convergence
## [1] 0
BIC.2nd.seasfactor <-  2 *  fit$value + length(fit$par) * log(length(log.WI.ts))
print("BIC")
## [1] "BIC"
BIC.2nd.seasfactor
## [1] -6662.341
model.WI <- build(fit$par)  #This is part where he takes the model parameters
print("Observational noise from MLE")
## [1] "Observational noise from MLE"
model.WI$V
##             [,1]
## [1,] 0.005522154
print("Innovation variance matrix diagonal elements from MLE")
## [1] "Innovation variance matrix diagonal elements from MLE"
model.WI$W[1:2,1:2]
##              [,1]         [,2]
## [1,] 0.0007825436 0.000000e+00
## [2,] 0.0000000000 1.772021e-14
log.WI.filt2 <- dlmFilter(log.WI.ts, model.WI)

cov.filt <- with(log.WI.filt2, dlmSvd2var(U.C, D.C))

seas.term = 2

sd.seasonality.filt2 <- rep(NA,length(cov.filt))
for(i in 1:length(cov.filt)) sd.seasonality.filt2[i] = sqrt(cov.filt[[i]][seas.term,seas.term])
sd.seasonality.filt2 = ts(sd.seasonality.filt2[-(1:8)],frequency=7)
seasonality.filt2 = ts(log.WI.filt2$m[-(1:8),seas.term],frequency=7)
ll = seasonality.filt2 - 1.96 * sd.seasonality.filt2
ul = seasonality.filt2 + 1.96 * sd.seasonality.filt2
llim = min(ll)
ulim = max(ul)
plot(seasonality.filt2,ylim=c(llim,ulim))
lines(ll,lty=2,col="green")
lines(ul,lty=2,col="green")

llim = min(c(min(seasonality.filt2/sd.seasonality.filt2),-1.96))
ulim = max(c(max(seasonality.filt2/sd.seasonality.filt2),1.96))
plot(seasonality.filt2/sd.seasonality.filt2,ylim=c(llim,ulim))
abline(h=1.96,lty=2)
abline(h=-1.96,lty=2)

seas.term = 4

sd.seasonality.filt2 <- rep(NA,length(cov.filt))
for(i in 1:length(cov.filt)) sd.seasonality.filt2[i] = sqrt(cov.filt[[i]][seas.term,seas.term])
sd.seasonality.filt2 = ts(sd.seasonality.filt2[-(1:8)],frequency=7)
seasonality.filt2 = ts(log.WI.filt2$m[-(1:8),seas.term],frequency=7)
ll = seasonality.filt2 - 1.96 * sd.seasonality.filt2
ul = seasonality.filt2 + 1.96 * sd.seasonality.filt2
llim = min(ll)
ulim = max(ul)
plot(seasonality.filt2,ylim=c(llim,ulim))
lines(ll,lty=2,col="green")
lines(ul,lty=2,col="green")

llim = min(c(min(seasonality.filt2/sd.seasonality.filt2),-1.96))
ulim = max(c(max(seasonality.filt2/sd.seasonality.filt2),1.96))
plot(seasonality.filt2/sd.seasonality.filt2,ylim=c(llim,ulim))
abline(h=1.96,lty=2)
abline(h=-1.96,lty=2)

seas.term = 6

sd.seasonality.filt2 <- rep(NA,length(cov.filt))
for(i in 1:length(cov.filt)) sd.seasonality.filt2[i] = sqrt(cov.filt[[i]][seas.term,seas.term])
sd.seasonality.filt2 = ts(sd.seasonality.filt2[-(1:8)],frequency=7)
seasonality.filt2 = ts(log.WI.filt2$m[-(1:8),seas.term],frequency=7)
ll = seasonality.filt2 - 1.96 * sd.seasonality.filt2
ul = seasonality.filt2 + 1.96 * sd.seasonality.filt2
llim = min(ll)
ulim = max(ul)
plot(seasonality.filt2,ylim=c(llim,ulim))
lines(ll,lty=2,col="green")
lines(ul,lty=2,col="green")

llim = min(c(min(seasonality.filt2/sd.seasonality.filt2),-1.96))
ulim = max(c(max(seasonality.filt2/sd.seasonality.filt2),1.96))
plot(seasonality.filt2/sd.seasonality.filt2,ylim=c(llim,ulim))
abline(h=1.96,lty=2)
abline(h=-1.96,lty=2)

seas.term = 8

sd.seasonality.filt2 <- rep(NA,length(cov.filt))
for(i in 1:length(cov.filt)) sd.seasonality.filt2[i] = sqrt(cov.filt[[i]][seas.term,seas.term])
sd.seasonality.filt2 = ts(sd.seasonality.filt2[-(1:8)],frequency=7)
seasonality.filt2 = ts(log.WI.filt2$m[-(1:8),seas.term],frequency=7)
ll = seasonality.filt2 - 1.96 * sd.seasonality.filt2
ul = seasonality.filt2 + 1.96 * sd.seasonality.filt2
llim = min(ll)
ulim = max(ul)
plot(seasonality.filt2,ylim=c(llim,ulim))
lines(ll,lty=2,col="green")
lines(ul,lty=2,col="green")

llim = min(c(min(seasonality.filt2/sd.seasonality.filt2),-1.96))
ulim = max(c(max(seasonality.filt2/sd.seasonality.filt2),1.96))
plot(seasonality.filt2/sd.seasonality.filt2,ylim=c(llim,ulim))
abline(h=1.96,lty=2)
abline(h=-1.96,lty=2)

###################
#   Forecasting   #
###################

predictions <- dlmForecast(log.WI.filt2, n=28)

ll = predictions$f - 1.96 * sqrt(unlist(predictions$Q))
ul = predictions$f + 1.96 * sqrt(unlist(predictions$Q))
plot(log.WI.ts, xlab = "", col = "darkgrey",xlim=c(250,300),lwd=2)
#plot(log.choc, xlab = "", col = "darkgrey",xlim=c(1958,2000), ylim=c(1000,10000),lwd=2)
lines(predictions$f, col="red",lwd=2)
lines(ll,lty=2, col="green",lwd=2)
lines(ul,lty=2, col="green",lwd=2)

######################################################
#          One-step ahead forecast error for the last 9 years                #
######################################################
print(" Mean absolute forecast error")
## [1] " Mean absolute forecast error"
# Mean absolute forecast error (MAE)
mean(abs(log.WI.filt2$f[1800:1908] - log.WI.ts[1800:1908]))
## [1] 0.07188769
print(" Mean squared forecast error (MSE)")
## [1] " Mean squared forecast error (MSE)"
# Mean squared forecast error (MSE)
mean((log.WI.filt2$f[1800:1908] - log.WI.ts[1800:1908])^2)
## [1] 0.007758874
print(" Mean absolute percentage forecast error (MAPE)")
## [1] " Mean absolute percentage forecast error (MAPE)"
# Mean absolute percentage forecast error (MAPE)
mean(abs(log.WI.filt2$f[1800:1908] - log.WI.ts[1800:1908]) / log.WI.ts[1800:1908])
## [1] 0.007733446
plot(log.WI.filt2$f,ylim=c(9,11))
lines(log.WI.ts,col="green")

# Following snippet of ccode is for model diagonstics
# Get one-step ahead forecast errors
res <- residuals(log.WI.filt2, sd=FALSE)

# Plot one-step ahead forecast errors

plot(res,type='h'); abline(h=0)

par(mfrow=c(1,2))
acf(res,lag.max = 260)
pacf(res,lag.max = 260)

# Plot qq-plot of one-step ahead forecast errors
qqnorm(res); qqline(res)


# Test normality with the Shapiro-Wilk normality test
# H_0: errors are normally distribution
# H_A: errors are not normally distribution
shapiro.test(res)
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.9806, p-value = 2.015e-15
# Test autocorrelation with the Ljung-Box test
# H_0: errors are independent
# H_A: errors exhibit serial correlation
 Box.test(res, lag=20, type="Ljung")   
## 
##  Box-Ljung test
## 
## data:  res
## X-squared = 263.15, df = 20, p-value < 2.2e-16
sapply(1 : 20, function(i)
       Box.test(res, lag = i, type = "Ljung-Box")$p.value)
##  [1] 1.930376e-04 2.654955e-04 1.358913e-13 2.262635e-13 0.000000e+00
##  [6] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [11] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [16] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00

build <- function(parm) {
  dlmModPoly(order = 2, dV = exp(parm[1]), dW = c(exp(parm[2]),exp(parm[3]))) + dlmModTrig(s = 7, dV = 0, dW=exp(parm[4]))
}
fit <- dlmMLE(log.TX.ts, rep(0,4), build)
fit$convergence
## [1] 0
BIC.2nd.seasfactor <-  2 *  fit$value + length(fit$par) * log(length(log.TX.ts))
print("BIC")
## [1] "BIC"
BIC.2nd.seasfactor
## [1] -5512.191
model.TX<- build(fit$par)  #This is part where he takes the model parameters
print("Observational noise from MLE")
## [1] "Observational noise from MLE"
model.TX$V
##             [,1]
## [1,] 0.008504631
print("Innovation variance matrix diagonal elements from MLE")
## [1] "Innovation variance matrix diagonal elements from MLE"
model.TX$W[1:2,1:2]
##             [,1]         [,2]
## [1,] 0.003024094 0.000000e+00
## [2,] 0.000000000 5.027449e-14
log.TX.filt2 <- dlmFilter(log.TX.ts, model.TX)

cov.filt <- with(log.TX.filt2, dlmSvd2var(U.C, D.C))

seas.term = 2

sd.seasonality.filt2 <- rep(NA,length(cov.filt))
for(i in 1:length(cov.filt)) sd.seasonality.filt2[i] = sqrt(cov.filt[[i]][seas.term,seas.term])
sd.seasonality.filt2 = ts(sd.seasonality.filt2[-(1:8)],frequency=7)
seasonality.filt2 = ts(log.TX.filt2$m[-(1:8),seas.term],frequency=7)
ll = seasonality.filt2 - 1.96 * sd.seasonality.filt2
ul = seasonality.filt2 + 1.96 * sd.seasonality.filt2
llim = min(ll)
ulim = max(ul)
plot(seasonality.filt2,ylim=c(llim,ulim))
lines(ll,lty=2,col="green")
lines(ul,lty=2,col="green")

llim = min(c(min(seasonality.filt2/sd.seasonality.filt2),-1.96))
ulim = max(c(max(seasonality.filt2/sd.seasonality.filt2),1.96))
plot(seasonality.filt2/sd.seasonality.filt2,ylim=c(llim,ulim))
abline(h=1.96,lty=2)
abline(h=-1.96,lty=2)

seas.term = 4

sd.seasonality.filt2 <- rep(NA,length(cov.filt))
for(i in 1:length(cov.filt)) sd.seasonality.filt2[i] = sqrt(cov.filt[[i]][seas.term,seas.term])
sd.seasonality.filt2 = ts(sd.seasonality.filt2[-(1:8)],frequency=7)
seasonality.filt2 = ts(log.TX.filt2$m[-(1:8),seas.term],frequency=7)
ll = seasonality.filt2 - 1.96 * sd.seasonality.filt2
ul = seasonality.filt2 + 1.96 * sd.seasonality.filt2
llim = min(ll)
ulim = max(ul)
plot(seasonality.filt2,ylim=c(llim,ulim))
lines(ll,lty=2,col="green")
lines(ul,lty=2,col="green")

llim = min(c(min(seasonality.filt2/sd.seasonality.filt2),-1.96))
ulim = max(c(max(seasonality.filt2/sd.seasonality.filt2),1.96))
plot(seasonality.filt2/sd.seasonality.filt2,ylim=c(llim,ulim))
abline(h=1.96,lty=2)
abline(h=-1.96,lty=2)

seas.term = 6

sd.seasonality.filt2 <- rep(NA,length(cov.filt))
for(i in 1:length(cov.filt)) sd.seasonality.filt2[i] = sqrt(cov.filt[[i]][seas.term,seas.term])
sd.seasonality.filt2 = ts(sd.seasonality.filt2[-(1:8)],frequency=7)
seasonality.filt2 = ts(log.TX.filt2$m[-(1:8),seas.term],frequency=7)
ll = seasonality.filt2 - 1.96 * sd.seasonality.filt2
ul = seasonality.filt2 + 1.96 * sd.seasonality.filt2
llim = min(ll)
ulim = max(ul)
plot(seasonality.filt2,ylim=c(llim,ulim))
lines(ll,lty=2,col="green")
lines(ul,lty=2,col="green")

llim = min(c(min(seasonality.filt2/sd.seasonality.filt2),-1.96))
ulim = max(c(max(seasonality.filt2/sd.seasonality.filt2),1.96))
plot(seasonality.filt2/sd.seasonality.filt2,ylim=c(llim,ulim))
abline(h=1.96,lty=2)
abline(h=-1.96,lty=2)

seas.term = 8

sd.seasonality.filt2 <- rep(NA,length(cov.filt))
for(i in 1:length(cov.filt)) sd.seasonality.filt2[i] = sqrt(cov.filt[[i]][seas.term,seas.term])
sd.seasonality.filt2 = ts(sd.seasonality.filt2[-(1:8)],frequency=7)
seasonality.filt2 = ts(log.TX.filt2$m[-(1:8),seas.term],frequency=7)
ll = seasonality.filt2 - 1.96 * sd.seasonality.filt2
ul = seasonality.filt2 + 1.96 * sd.seasonality.filt2
llim = min(ll)
ulim = max(ul)
plot(seasonality.filt2,ylim=c(llim,ulim))
lines(ll,lty=2,col="green")
lines(ul,lty=2,col="green")

llim = min(c(min(seasonality.filt2/sd.seasonality.filt2),-1.96))
ulim = max(c(max(seasonality.filt2/sd.seasonality.filt2),1.96))
plot(seasonality.filt2/sd.seasonality.filt2,ylim=c(llim,ulim))
abline(h=1.96,lty=2)
abline(h=-1.96,lty=2)

###################
#   Forecasting   #
###################

predictions <- dlmForecast(log.TX.filt2, n=28)

ll = predictions$f - 1.96 * sqrt(unlist(predictions$Q))
ul = predictions$f + 1.96 * sqrt(unlist(predictions$Q))
plot(log.TX.ts, xlab = "", col = "darkgrey",xlim=c(250,300),lwd=2)
#plot(log.choc, xlab = "", col = "darkgrey",xlim=c(1958,2000), ylim=c(1000,10000),lwd=2)
lines(predictions$f, col="red",lwd=2)
lines(ll,lty=2, col="green",lwd=2)
lines(ul,lty=2, col="green",lwd=2)

######################################################
#          One-step ahead forecast error for the last 9 years                #
######################################################
print(" Mean absolute forecast error")
## [1] " Mean absolute forecast error"
# Mean absolute forecast error (MAE)
mean(abs(log.TX.filt2$f[1800:1908] - log.TX.ts[1800:1908]))
## [1] 0.09847063
print(" Mean squared forecast error (MSE)")
## [1] " Mean squared forecast error (MSE)"
# Mean squared forecast error (MSE)
mean((log.WI.filt2$f[1800:1908] - log.WI.ts[1800:1908])^2)
## [1] 0.007758874
print(" Mean absolute percentage forecast error (MAPE)")
## [1] " Mean absolute percentage forecast error (MAPE)"
# Mean absolute percentage forecast error (MAPE)
mean(abs(log.WI.filt2$f[1800:1908] - log.WI.ts[1800:1908]) / log.WI.ts[1800:1908])
## [1] 0.007733446
plot(log.WI.filt2$f,ylim=c(8,10))
lines(log.WI.ts,col="green")

# Following snippet of ccode is for model diagonstics
# Get one-step ahead forecast errors
res <- residuals(log.TX.filt2, sd=FALSE)

# Plot one-step ahead forecast errors

plot(res,type='h'); abline(h=0)

par(mfrow=c(1,2))
acf(res,lag.max = 260)
pacf(res,lag.max = 260)

# Plot qq-plot of one-step ahead forecast errors
qqnorm(res); qqline(res)


# Test normality with the Shapiro-Wilk normality test
# H_0: errors are normally distribution
# H_A: errors are not normally distribution
shapiro.test(res)
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.97685, p-value < 2.2e-16
# Test autocorrelation with the Ljung-Box test
# H_0: errors are independent
# H_A: errors exhibit serial correlation
 Box.test(res, lag=20, type="Ljung")   
## 
##  Box-Ljung test
## 
## data:  res
## X-squared = 294.26, df = 20, p-value < 2.2e-16
sapply(1 : 20, function(i)
       Box.test(res, lag = i, type = "Ljung-Box")$p.value)
##  [1] 1.300037e-04 1.020206e-11 5.206946e-14 0.000000e+00 0.000000e+00
##  [6] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [11] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## [16] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00